#General

library(tidyverse) # includes ggplot2, for data visualisation. dplyr, for data manipulation.
library(RColorBrewer) # for a colourful plot
library(ggrepel) # for nice annotations

# Libraries----

installed.packages()

version

packageStatus()

.libPaths()

#General Libraries
library(tidyverse)
library(readxl)
library(writexl)

#Volcano Plots
library(ggplot2)
library(ggrepel)
library(ggpubr)

# Refine Volcano-Plots----
# load libraries
library(ggplot2)
library(ggrepel)
library(ggpubr) #warning message: ggpubr wurde unter R Version 4.1.3 erstellt
library(readxl)
library(writexl)
library(tidyverse) # includes ggplot2, for data visualisation. dplyr, for data manipulation.
library(RColorBrewer) # for a colourful plot
library(ggrepel) # for nice annotations
library(dplyr)

#Load data
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/cJUN")
RNA_SP <- read_excel(path = "01_contrast.DMSO1_vs_SP_ID0_log2FC_correct.xlsx")
RNA_SP <- as.data.frame(RNA_SP)
RNA_SP <- RNA_SP %>% filter(!is.na(padj))
RNA_SP <- RNA_SP %>% arrange(desc(abs(log2FoldChange)))

#Define up- and downregulated as well as n.s. significant differential regulated genes
RNA_SP$diffexpr <- "NO"
RNA_SP$diffexpr[RNA_SP$log2FoldChange >= 0.5 & RNA_SP$padj < 0.05] <- "UP"
RNA_SP$diffexpr[RNA_SP$log2FoldChange <= -0.5 & RNA_SP$padj < 0.05] <- "DOWN"

#Label genes
RNA_SP$Labels <- NA #Add column named "Labels" with NA values
RNA_SP$Labels[RNA_SP$diffexpr != "NO"] <- RNA_SP$symbol[RNA_SP$diffexpr != "NO"]

#VolcanoPlots
#SP_only
ggplot(data = RNA_SP, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 12, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  ylim(-0.5, 25) +
  theme(text = element_text(size = 14))

ggsave("SP_only.tiff", width = 8, height = 8, dpi = 600)


# Show the top correlated genes to JUN----
# Load necessary libraries
library(readxl)
library(pheatmap)
library(RColorBrewer)

# Clear environment
rm(list=ls())  

# Set working directory
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/cJUN/")

# Read Excel file
RNA_RLD <- read_excel("DESeq2.all.rld.blind.xlsx")

# Convert to data frame
RNA_RLD <- as.data.frame(RNA_RLD)

# Remove rows where the symbol is NA
RNA_RLD <- RNA_RLD[!is.na(RNA_RLD[,2]), ]

# Store symbol column before removing it
gene_symbols <- RNA_RLD[,2]  

# Ensure unique row names
rownames(RNA_RLD) <- make.names(gene_symbols, unique=TRUE)

# Remove the first two columns (gene_id and symbol)
RNA_RLD <- RNA_RLD[, -c(1,2)]  

# Compute correlation between genes (rows)
cor_mat <- cor(t(RNA_RLD))  

# Ensure JUN is in the dataset
if ("JUN" %in% rownames(cor_mat)) {
  
  # Extract correlations for JUN
  jun_cor <- cor_mat["JUN", ]
  
  # Exclude JUN itself and select the top 50 correlated genes
  top_50_genes <- names(sort(abs(jun_cor[-which(names(jun_cor) == "JUN")]), decreasing=TRUE)[1:50])
  
  # Subset correlation matrix for JUN and the top 50 correlated genes
  cor_mat_top50 <- cor_mat[c("JUN", top_50_genes), c("JUN", top_50_genes)]
  
  # Define custom color palette
  colors <- colorRampPalette(c("darkblue", "white", "darkred"))(255)
  
  # Define unique breaks
  breaks <- c(seq(-1, -0.8001, length.out = 85),  
              seq(-0.8, 0.8, length.out = 86),   
              seq(0.8001, 1, length.out = 85))   
  
  # Open TIFF device (300 DPI for high quality)
  tiff("Heatmap_Top50_Genes_JUN.tiff", width=2500, height=2000, res=300)
  
  # Generate heatmap for JUN and the top 50 correlated genes
  pheatmap(cor_mat_top50, 
           col=colors, 
           fontsize_row=5, 
           fontsize_col=5, 
           main="Top 50 Genes Correlated with JUN", 
           breaks=breaks)
  
  # Close the TIFF device
  dev.off()
  
} else {
  print("JUN not found in the dataset.")
}

# Show the top correlated genes to JUN, only CTX-treated samples and vehicle, no SP-treated samples, correlation matrix----
# Load necessary libraries
library(readxl)
library(writexl)
library(pheatmap)
library(RColorBrewer)

# Clear environment
rm(list=ls())  

# Set working directory
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/cJUN/")

# Read Excel file
RNA_RLD <- read_excel("DESeq2.all.rld.blind_CTX_and_vehicle_treated.xlsx")

# Convert to data frame
RNA_RLD <- as.data.frame(RNA_RLD)

# Ensure row names are gene names
rownames(RNA_RLD) <- make.unique(as.character(RNA_RLD[, 2]))  # Unique row names

# Remove first two columns (non-numeric)
RNA_RLD <- RNA_RLD[, -c(1, 2)]
jun_values <- as.numeric(RNA_RLD["JUN", ])  # Convert to numeric

# Compute Spearman correlation and p-value for all genes with JUN
cor_results <- apply(RNA_RLD, 1, function(x) {
  x <- as.numeric(x)  # Ensure x is numeric
  test <- cor.test(x, jun_values, method = "spearman", exact = FALSE)  # Compute correlation + p-value
  c(correlation = test$estimate, p_value = test$p.value)  # Return both
})

# Convert results to a data frame
cor_df <- as.data.frame(t(cor_results))  # Transpose results
cor_df$Gene <- rownames(RNA_RLD)  # Add gene names

# Sort by absolute correlation (highest first)
cor_df <- cor_df[order(-abs(cor_df$correlation)), ]

# Save as Excel file
write_xlsx(cor_df, "JUN_correlation_CTX_and_vehicle_treated.xlsx")

#


# Show the top correlated genes to JUN, excel with gene_id----
# Load necessary libraries
library(readxl)
library(writexl)
library(pheatmap)
library(RColorBrewer)

# Clear environment
rm(list=ls())  

# Set working directory
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/cJUN/")

# Read Excel file
RNA_RLD <- read_excel("DESeq2.all.rld.blind_CTX_and_vehicle_treated.xlsx")

# Convert to data frame
RNA_RLD <- as.data.frame(RNA_RLD)

# Extract gene_id (first column)
gene_ids <- RNA_RLD[, 1]  

# Ensure row names are gene names (column 2)
rownames(RNA_RLD) <- make.unique(as.character(RNA_RLD[, 2]))  

# Remove first two columns (non-numeric)
RNA_RLD <- RNA_RLD[, -c(1, 2)]

# Convert JUN expression values to numeric
jun_values <- as.numeric(RNA_RLD["JUN", ])  

# Compute Spearman correlation and p-value for all genes with JUN
cor_results <- apply(RNA_RLD, 1, function(x) {
  x <- as.numeric(x)  # Ensure numeric
  test <- cor.test(x, jun_values, method = "spearman", exact = FALSE)  
  c(correlation = test$estimate, p_value = test$p.value)  
})

# Convert results to a data frame
cor_df <- as.data.frame(t(cor_results))  
cor_df$Gene <- rownames(RNA_RLD)  # Add gene names
cor_df$Gene_ID <- gene_ids  # Add gene_id from original data

# Sort by absolute correlation (highest first)
cor_df <- cor_df[order(-abs(cor_df$correlation)), ]

# Save as Excel file
write_xlsx(cor_df, "JUN_correlation_CTX_and_vehicle_treated_with_gene_id.xlsx")

#Figure with JUN correlated genes that are differentially expressed in at least 3 data sets----
library(ggplot2)
library(ggrepel)
library(readxl)
library(tidyverse)

# Clear environment
rm(list=ls())  

# Set working directory
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/cJUN/")

# Read Excel file
Genes_of_interest <- read_excel("JUN_correlation_figure_20250327.xlsx")
Genes_of_interest <- as.data.frame(Genes_of_interest)

# Replace p_value = 0 with 1.6e-10 to avoid log10 errors
Genes_of_interest$p_value[Genes_of_interest$p_value == 0] <- 1.60E-10

# Ensure correlation.rho is within the expected range (-1 to 1)
Genes_of_interest <- Genes_of_interest %>%
  filter(correlation.rho >= -1 & correlation.rho <= 1)

# Reverse the order of gene symbols
Genes_of_interest$symbol <- factor(Genes_of_interest$symbol, levels = rev(unique(Genes_of_interest$symbol)))

p <- ggplot(Genes_of_interest, aes(x = correlation.rho, y = symbol, color = p_value)) +
  geom_vline(xintercept = 0, color = "black", linewidth = 0.5) +
  geom_segment(aes(x = 0, xend = correlation.rho, y = symbol, yend = symbol), linewidth = 1) +
  scale_x_continuous(
    limits = c(-1, 1),
    breaks = seq(-1, 1, by = 0.2)
  ) +
  labs(x = "correlation.rho", y = "", color = "p-value") +
  scale_color_gradientn(
    colors = c("red", "darkred", "darkblue"),
    trans = "log10",
    limits = c(1.6e-10, 5e-5),
    breaks = c(1.6e-10, 1e-8, 1e-6, 5e-5),
    labels = c("1.6e-10", "1e-8", "1e-6", "5e-5")
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_line(color = "grey90", linetype = "dashed"),
    axis.text = element_text(size = 9),
    axis.title = element_text(size = 10),
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10)
  )

p


# Plot with RLD
p <- ggplot(Genes_of_interest, aes(x = correlation.rho, y = symbol, color = p_value)) +
  geom_vline(xintercept = 0, color = "black", linewidth = 0.5) +
  # Main line segments for correlation
  geom_segment(aes(x = 0, xend = correlation.rho, y = symbol, yend = symbol), linewidth = 1) +
  # Add squares next to gene names, using a fixed x position and mapping fill to RLD.
  # inherit.aes = FALSE ensures this layer has its own aesthetics.
  geom_point(aes(x = -1.5, y = symbol, fill = RLD),
             shape = 22, color = "white", size = 3, inherit.aes = FALSE) +
  scale_x_continuous(
    limits = c(-1.5, 1),
    breaks = seq(-1, 1, by = 0.2)
  ) +
  labs(x = "correlation.rho", y = "", color = "p-value", fill = "Abundance [RLD]") +
  scale_color_gradientn(
    colors = c("red", "darkred", "darkblue"),
    trans = "log10",
    limits = c(1.6e-10, 5e-5),
    breaks = c(1.6e-10, 1e-8, 1e-6, 5e-5),
    labels = c("1.6e-10", "1e-8", "1e-6", "5e-5")
  ) +
  scale_fill_gradient(
    low = "#94D2BD",
    high = "#EE9B00",
    limits = c(3.9, 14)
  ) +
  guides(
    color = guide_colorbar(order = 1),
    fill = guide_colorbar(order = 2)
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_line(color = "grey90", linetype = "dashed"),
    axis.text = element_text(size = 9),
    axis.title = element_text(size = 10),
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10)
  )

p

p <- ggplot(Genes_of_interest, aes(x = correlation.rho, y = symbol, color = p_value)) +
  geom_vline(xintercept = 0, color = "black", linewidth = 0.5) +
  # Main line segments for correlation
  geom_segment(aes(x = 0, xend = correlation.rho, y = symbol, yend = symbol), linewidth = 1) +
  # Add squares next to gene names, using a fixed x position and mapping fill to Averaged_RLD_all_Samples.
  # inherit.aes = FALSE ensures this layer has its own aesthetics.
  geom_point(aes(x = -1, y = symbol, fill = Averaged_RLD_all_Samples),
             shape = 22, color = "white", size = 3, inherit.aes = FALSE) +
  scale_x_continuous(
    limits = c(-1.1, 1),
    breaks = seq(-1, 1, by = 0.2)
  ) +
  labs(x = "correlation.rho", y = "", color = "p-value", fill = "Abundance [RLD]") +
  scale_color_gradientn(
    colors = c("red", "darkred", "darkblue"),
    trans = "log10",
    limits = c(1.6e-10, 5e-5),
    breaks = c(1.6e-10, 1e-8, 1e-6, 5e-5),
    labels = c("1.6e-10", "1e-8", "1e-6", "5e-5")
  ) +
  scale_fill_gradient(
    low = "#94D2BD",
    high = "#EE9B00",
    limits = c(3.9, 14)
  ) +
  guides(
    color = guide_colorbar(order = 1),
    fill = guide_colorbar(order = 2)
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_line(color = "grey90", linetype = "dashed"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 11),
    legend.text = element_text(size = 9),
    legend.title = element_text(size = 11)
  )

p

# Save the plot as a TIFF file
ggsave(filename = "JUN_Genes_of_interest_plot_with_FLD.tiff", plot = p, device = "tiff", width = 8, height = 10, dpi = 300)
